name: lint

on:
  workflow_call:
    inputs:
      working-directory:
        required: true
        type: string
        description: "From which folder this pipeline executes"

env:
  POETRY_VERSION: "1.5.1"
  WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}

jobs:
  build:
    runs-on: ubuntu-latest
    env:
      # This number is set "by eye": we want it to be big enough
      # so that it's bigger than the number of commits in any reasonable PR,
      # and also as small as possible since increasing the number makes
      # the initial `git fetch` slower.
      FETCH_DEPTH: 50
    strategy:
      matrix:
        # Only lint on the min and max supported Python versions.
        # It's extremely unlikely that there's a lint issue on any version in between
        # that doesn't show up on the min or max versions.
        #
        # GitHub rate-limits how many jobs can be running at any one time.
        # Starting new jobs is also relatively slow,
        # so linting on fewer versions makes CI faster.
        python-version:
          - "3.8"
          - "3.11"
    steps:
      - uses: actions/checkout@v3
        with:
          # Fetch the last FETCH_DEPTH commits, so the mtime-changing script
          # can accurately set the mtimes of files modified in the last FETCH_DEPTH commits.
          fetch-depth: ${{ env.FETCH_DEPTH }}
      - name: Restore workdir file mtimes to last-edited commit date
        id: restore-mtimes
        # This is needed to make black caching work.
        # Black's cache uses file (mtime, size) to check whether a lookup is a cache hit.
        # Without this command, files in the repo would have the current time as the modified time,
        # since the previous action step just created them.
        # This command resets the mtime to the last time the files were modified in git instead,
        # which is a high-quality and stable representation of the last modification date.
        run: |
          # Important considerations:
          # - These commands run at base of the repo, since we never `cd` to the `WORKDIR`.
          # - We only want to alter mtimes for Python files, since that's all black checks.
          # - We don't need to alter mtimes for directories, since black doesn't look at those.
          # - We also only alter mtimes inside the `WORKDIR` since that's all we'll lint.
          # - This should run before `poetry install`, because poetry's venv also contains
          #   Python files, and we don't want to alter their mtimes since they aren't linted.

          # Ensure we fail on non-zero exits and on undefined variables.
          # Also print executed commands, for easier debugging.
          set -eux

          # Restore the mtimes of Python files in the workdir based on git history.
          .github/tools/git-restore-mtime --no-directories "$WORKDIR/**/*.py"

          # Since CI only does a partial fetch (to `FETCH_DEPTH`) for efficiency,
          # the local git repo doesn't have full history. There are probably files
          # that were last modified in a commit *older than* the oldest fetched commit.
          # After `git-restore-mtime`, such files have a mtime set to the oldest fetched commit.
          #
          # As new commits get added, that timestamp will keep moving forward.
          # If left unchanged, this will make `black` think that the files were edited
          # more recently than its cache suggests. Instead, we can set their mtime
          # to a fixed date in the far past that won't change and won't cause cache misses in black.
          #
          # For all workdir Python files modified in or before the oldest few fetched commits,
          # make their mtime be 2000-01-01 00:00:00.
          OLDEST_COMMIT="$(git log --reverse '--pretty=format:%H' | head -1)"
          OLDEST_COMMIT_TIME="$(git show -s '--format=%ai' "$OLDEST_COMMIT")"
          find "$WORKDIR" -name '*.py' -type f -not -newermt "$OLDEST_COMMIT_TIME" -exec touch -c -m -t '200001010000' '{}' '+'

          echo "oldest-commit=$OLDEST_COMMIT" >> "$GITHUB_OUTPUT"

      - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
        uses: "./.github/actions/poetry_setup"
        with:
          python-version: ${{ matrix.python-version }}
          poetry-version: ${{ env.POETRY_VERSION }}
          working-directory: ${{ inputs.working-directory }}
          cache-key: lint-with-extras

      - name: Check Poetry File
        shell: bash
        working-directory: ${{ inputs.working-directory }}
        run: |
          poetry check

      - name: Check lock file
        shell: bash
        working-directory: ${{ inputs.working-directory }}
        run: |
          poetry lock --check

      - name: Install dependencies
        # Also installs dev/lint/test/typing dependencies, to ensure we have
        # type hints for as many of our libraries as possible.
        # This helps catch errors that require dependencies to be spotted, for example:
        # https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
        #
        # If you change this configuration, make sure to change the `cache-key`
        # in the `poetry_setup` action above to stop using the old cache.
        # It doesn't matter how you change it, any change will cause a cache-bust.
        working-directory: ${{ inputs.working-directory }}
        run: |
          poetry install --with dev,lint,test,typing

      - name: Restore black cache
        uses: actions/cache@v3
        env:
          CACHE_BASE: black-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
          SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
        with:
          path: |
            ${{ env.WORKDIR }}/.black_cache
          key: ${{ env.CACHE_BASE }}-${{ steps.restore-mtimes.outputs.oldest-commit }}
          restore-keys:
            # If we can't find an exact match for our cache key, accept any with this prefix.
            ${{ env.CACHE_BASE }}-

      - name: Get .mypy_cache to speed up mypy
        uses: actions/cache@v3
        env:
          SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
        with:
          path: |
            ${{ env.WORKDIR }}/.mypy_cache
          key: mypy-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}

      - name: Analysing the code with our lint
        working-directory: ${{ inputs.working-directory }}
        env:
          BLACK_CACHE_DIR: .black_cache
        run: |
          make lint
